Os dados

Lendo e visualizandos os dados do Brasil do COVID fornecidos por

Os dados são atualizados diariamente… Então normalmente a cada dia esse pipeline pode mudar seus resultados.

[1]:

import pandas as pd

# Lendo os dados online - wcota
data_path = 'https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv'

online_data = pd.read_csv(data_path, delimiter=",")
online_data.head()

[1]:
date country state city newDeaths deaths newCases totalCases deathsMS totalCasesMS deaths_per_100k_inhabitants totalCases_per_100k_inhabitants deaths_by_totalCases recovered suspects tests tests_per_100k_inhabitants
0 2020-02-25 Brazil SP TOTAL 0 0 1 1 0 0 0.0 0.00218 0.0 NaN NaN NaN NaN
1 2020-02-25 Brazil TOTAL TOTAL 0 0 1 1 0 0 0.0 0.00048 0.0 NaN NaN NaN NaN
2 2020-02-26 Brazil SP TOTAL 0 0 0 1 0 1 0.0 0.00218 0.0 NaN NaN NaN NaN
3 2020-02-26 Brazil TOTAL TOTAL 0 0 0 1 0 1 0.0 0.00048 0.0 NaN NaN NaN NaN
4 2020-02-27 Brazil SP TOTAL 0 0 0 1 0 1 0.0 0.00218 0.0 NaN NaN NaN NaN

Filtrando e limpando os dados

[2]:

selected_state = "SP"

at_state = online_data['state']==selected_state
local_data = online_data[at_state]
local_data = local_data[local_data.recovered.notnull()]
#local_data = local_data.fillna(method="backfill")

local_data.head()

[2]:
date country state city newDeaths deaths newCases totalCases deathsMS totalCasesMS deaths_per_100k_inhabitants totalCases_per_100k_inhabitants deaths_by_totalCases recovered suspects tests tests_per_100k_inhabitants
347 2020-03-24 Brazil SP TOTAL 10 40 65 810 40 810 0.08711 1.76397 0.04938 1.0 4572.0 NaN NaN
375 2020-03-25 Brazil SP TOTAL 8 48 52 862 48 862 0.10453 1.87722 0.05568 1.0 4300.0 NaN NaN
403 2020-03-26 Brazil SP TOTAL 10 58 191 1053 58 1052 0.12631 2.29317 0.05508 1.0 14312.0 NaN NaN
431 2020-03-27 Brazil SP TOTAL 10 68 170 1223 68 1223 0.14809 2.66338 0.05560 1.0 14312.0 NaN NaN
459 2020-03-28 Brazil SP TOTAL 16 84 183 1406 84 1406 0.18293 3.06191 0.05974 1.0 14312.0 NaN NaN
[3]:

import numpy as np
from datetime import datetime

first_date = local_data["date"].iloc[0]
first_date = datetime.fromisoformat(first_date)

if selected_state == "SP":
#     N = 11869660
    N = 44.01e6
elif selected_state == "TOTAL":
    N = 220e6

I = list()                                       # <- I(t)
R = local_data["recovered"].iloc[1:].to_numpy()  # <- R(t)
M = local_data["newDeaths"].iloc[1:].to_numpy()  # <- M(t)
nR = np.diff(local_data["recovered"].to_numpy()) # <- dR(t)/dt
nC = local_data["newCases"].iloc[1:].to_numpy()  # <- nC(t)/dt

I = [ local_data["totalCases"].iloc[1] ]         # I(0)

# I(t) <- I(t-1) + newCases(t) - newMortes(t) - newRecovered(t)
for t in range(len(M)-1):
    I.append(I[-1] + nC[t] - M[t] - nR[t])
I = np.array(I)

Visualizando a evolução

[18]:

from bokeh.models   import Legend, ColumnDataSource, RangeTool, LinearAxis, Range1d, HoverTool
from bokeh.palettes import brewer, Inferno256
from bokeh.plotting import figure, show
from bokeh.layouts  import column
from bokeh.io       import output_notebook

output_notebook()

from datetime import timedelta

# Criando o vetor de tempo
date_vec = [ first_date + timedelta(days=k) for k in range(len(M))]

# Criando os valores para legenda no plot
year =  [str(int(d.year)) for d in date_vec ]
month = [("0"+str(int(d.month)))[-2:] for d in date_vec ]
day =   [("0"+str(int(d.day)))[-2:] for d in date_vec ]



# Criando a fonte de dados
source = ColumnDataSource(data={
    'Data'       : date_vec,
    'd': day, 'm': month, 'y': year,
    'Infectados' : I,
    'Removidos'  : R,
    'Mortes'     : M,
})


# Criando a figura
p = figure(plot_height=500,
           plot_width=600,
           x_axis_type="datetime",
           tools="",
           toolbar_location=None,
           y_axis_type="log",
           title="Evolução do COVID - São Paulo")

# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"

# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)

# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "top_left"

# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
    tooltips=[
        ( 'Indivíduos', '$y{i}'),
        ( 'Data',       '@d/@m/@y' ),
    ],
    renderers=[
        r_p, i_p, m_p
    ]
))

show(p)

Loading BokehJS ...

O problema

O conjunto de equações diferenciais que caracteriza o modelo é descrito abaixo. No modelo $:nbsphinx-math:beta `- :nbsphinx-math:text{representa a taxa de transmissão ou taxa efetiva de contato}` $ e \(r - \text{a taxa de remoção ou recuperação.}\)

\[\begin{split}\begin{split} \frac{dS(t)}{dt} & = -\beta S(t) I(t) \\ \frac{dI(t)}{dt} & = \beta S(t) I(t) - rI(t) \\ \frac{dR(t)}{dt} & = r I(t) \end{split}\end{split}\]

Gostaríamos de identificar quais parâmetros \(\beta\) e \(r\) resultam num melhor ajuste do modelo para os dados de S,I e R

[5]:

# Importando o modelo SIR
from models import *

sir_model = ss.SIR(pop=N, focus=["I", "R"])

Loading BokehJS ...

Estimando os parâmetros

Para estimarmos os parâmetros do modelo \(\mathbf{\beta}\) e \(\mathbf{r}\), vamos utilizar inicialmente o método de mínimos quadrados. Podemos então formular o problema a partir da Equação abaixo. Na Equação \(y_m(k)\) representa o dado real em cada amostra \(k\); \(y_s(\theta,k)\) representa o valor estimado a partir da simulação do modelo para uma determinada amostra \(k\) e \(\theta\) representa o vetor ed parâmetros \(\theta = [ \beta \; \; r]^T\).

\[min_{\theta}= \sum_{k=1}^{K}(y_m(k) - y_s(\theta,k))^2\]

A equação formula a pergunta: quais os valores de \(beta\) e \(r\) que minizam o erro quadrático quando comparados com os dados reais.

[12]:

import numpy as np

S = N - I - R

time = np.linspace(0, len(I), len(I))

# Estimando os parâmetros
sir_model.fit(S, I, R, time, sample_ponder=True, resample=True, beta_sens=[1000,10], r_sens=[1000,10])

r_included = True

         ├─ Resample from sizes ─  65 65 65 65
         └─ Resample to sizes ─    1540 1540 1540 1540
         ├─ S(0) ─ I(0) ─ R(0) ─  [44009136.41827327, 862.5810526571099, 1.0006740750082481]
         ├─ beta ─   1   r ─   0.14285714285714285
         ├─ beta bound ─   0.001  ─  10
         ├─ r bound ─   0.00014285714285714284  ─  1.4285714285714284
         ├─ equation weights ─   [0.0016004861866103122, 1, 1]
         └─ Defined at:  0.08388235287485474  ─  0.010752489662406836

[13]:
sir_model.parameters[0]/sir_model.parameters[1] # Ro <- \beta * (1 / \r)
[13]:
7.801202838457649
[19]:

# Criando a figura
p1 = figure(plot_height=500,
           plot_width=600,
           x_axis_type="datetime",
           tools="",
           toolbar_location=None,
#            y_axis_type="log",
           title="Evolução do COVID - São Paulo")

# Preparando o estilo
p1.grid.grid_line_alpha = 0
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.yaxis.axis_label = "Indivíduos"
p1.xaxis.axis_label = "Dias"

# Incluindo as curvas
p1.line(time, I, legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885")
#p1.line(time, , legend_label="Mortes", line_cap="round", line_width=3, color="#de425b")
p1.line(time, R, legend_label="Removidos", line_cap="round", line_width=3, color="#99d594")

p1.scatter(sir_model.pipeline["resample"]["after"]["t"],
           sir_model.pipeline["resample"]["after"]["I"],
           marker="circle",line_color="#6666ee", fill_color="#ee6666", fill_alpha=0.5, size=3)

p1.scatter(sir_model.pipeline["resample"]["after"]["t"],
           sir_model.pipeline["resample"]["after"]["R"],
           marker="circle",line_color="#6666ee", fill_color="#ee6666", fill_alpha=0.5, size=3)

# Colocando as legendas
p1.legend.click_policy="hide"
p1.legend.location = "top_left"


show(p1)


[20]:

if r_included:
    initial = [S[0], I[0], R[0]]
else:
    initial = [S[0], I[0]]

results = sir_model.predict(initial, time)

[21]:

# Incluindo os dados de infectados
im_p = p.line(
    date_vec, results[1],
    legend_label="Infectados - Modelo",
    line_width=4,
    line_dash="dashed",
    line_cap="round",
    color="#f57f17"
)

# Incluindo os dados de recuperados
if r_included:
    rm_p = p.line(
        date_vec, results[2],
        legend_label="Removidos - Modelo",
        line_dash="dashed",
        line_width=4,
        line_cap="round",
        color="#1b5e20"
    )

show(p)

Predições utilizando o modelo

[17]:

# Criando os valores de tempo para previsão - 120 dias
t_sim = np.linspace(0, len(I) + 120, len(I) + 120)
date_vec_sim = [first_date + timedelta(days=k) for k in t_sim]

# Prevendo para os valores selecionados
prediction = sir_model.predict(initial, t_sim)



# Criando o gráfico com as predições

# Criando os valores para legenda no plot
year_sim =  [str(int(d.year)) for d in date_vec_sim ]
month_sim = [("0"+str(int(d.month)))[-2:] for d in date_vec_sim ]
day_sim =   [("0"+str(int(d.day)))[-2:] for d in date_vec_sim ]

if r_included:
    accum_Infect = [0]
    for i in N - prediction[1] - prediction[2]:
        accum_Infect.append(accum_Infect[-1]+i)

    accum_Infect = sir_model.parameters[1] * np.array(accum_Infect)

# Criando a fonte de dados
if r_included:
    source = ColumnDataSource(data={
        'Data'       : date_vec,
        'd': day, 'm': month, 'y': year,
        'Infectados' : I,
        'Removidos'  : R,
        'Mortes'     : M,
        'InfecModelo' : prediction[1],
        'RemovModelo' : prediction[2],
        'AccumInfect' : accum_Infect,
        'SucetModelo' : N - prediction[1] - prediction[2],
        'DataModelo'  : date_vec_sim,
        'ds': day_sim, 'ms': month_sim, 'ys': year_sim
    })
else:
    source = ColumnDataSource(data={
        'Data'       : date_vec,
        'd': day, 'm': month, 'y': year,
        'Infectados' : I,
        'Removidos'  : R,
        'Mortes'     : M,
        'InfecModelo' : prediction[1],
        'DataModelo'  : date_vec_sim,
        'ds': day_sim, 'ms': month_sim, 'ys': year_sim
    })


# Criando a figura
p = figure(plot_height=700,
           plot_width=800,
           x_axis_type="datetime",
           tools="",
           toolbar_location=None,
           y_axis_type="log",
           title="Previsão do COVID - Brasil")

# Preparando o estilo
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1
p.yaxis.axis_label = "Indivíduos"
p.xaxis.axis_label = "Dias"

# Incluindo as curvas
i_p = p.line(x='Data', y='Infectados', legend_label="Infectados", line_cap="round", line_width=3, color="#ffd885", source=source)
m_p = p.line(x='Data', y='Mortes', legend_label="Mortes", line_cap="round", line_width=3, color="#de425b", source=source)
r_p = p.line(x='Data', y='Removidos', legend_label="Removidos", line_cap="round", line_width=3, color="#99d594", source=source)

mp_p = p.line(x='DataModelo', y='InfecModelo', legend_label="Infectados - Modelo", line_dash="dashed", line_cap="round", line_width=4, color="#f57f17", source=source)

renders = [i_p, m_p, r_p, mp_p]

if r_included:
    rp_p = p.line(x='DataModelo', y='RemovModelo', legend_label="Removidos - Modelo", line_dash="dashed", line_cap="round", line_width=4, color="#1b5e20", source=source)
    renders.append(rp_p)

# Colocando as legendas
p.legend.click_policy="hide"
p.legend.location = "top_left"

# Incluindo a ferramenta de hover
p.add_tools(HoverTool(
    tooltips=[
        ( 'Indivíduos', '$y{0.00 a}' ),
        ( 'Data',       '@ds/@ms/@ys'),
    ],
    renderers=renders
))

show(p)

BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('AccumInfect', 186), ('Data', 65), ('DataModelo', 185), ('InfecModelo', 185), ('Infectados', 65), ('Mortes', 65), ('RemovModelo', 185), ('Removidos', 65), ('SucetModelo', 185), ('d', 65), ('ds', 185), ('m', 65), ('ms', 185), ('y', 65), ('ys', 185)